library(tidyverse)
library(readxl)
library(ggplot2)
library(plotly)
library(ggpubr)
library(ggforce)
library(extrafont)
library(ggrepel)

Results: Details of files to analyse and run information

print(paste0("Report generated on:  ", Sys.Date()))
## [1] "Report generated on:  2022-03-25"
RawXLinkProphetOutput <- read.delim("Data/XLinkProphet/iprophet-xl.xls", stringsAsFactors = FALSE)

#generated by running convert convertXLinkProphetXLStoUpload6col.pl iprophet-xl.xls 0.3065 FASTA=C:\TPP\database\yeastproteome.fasta
Raw6colOutput <- read.delim("Data/XLinkProphet/iprophet-xl_6col.txt", header=FALSE, stringsAsFactors = FALSE)
colnames(Raw6colOutput) <- c("Pep1", "Accession1", "PepPos1", "Pep2", "Accession2", "PepPos2", "ProtPos1", "ProtPos2")

SampleName = "04112020_Hpm1KO_Spheroplast"

#probability from XLinkProphet for CSMs to get a 1% FDR. This is different in every sample.
probability_01 = 0.0883
probability_comp_01 = 0.3065

#MethylQuant confidence level
MethylQuantConfidence = "High"

Filtering the peptide prophet results of interest by the probablity as assigned by XLinkProphet

FilteredCSMs <- RawXLinkProphetOutput %>%
  filter(probability >= probability_01)

FilteredRedundantCrosslinks <- RawXLinkProphetOutput %>%
  filter(composite_probability >= probability_comp_01) %>%
  distinct()

Processing to combined IDs and generate different levels of redundancy

ReturnXL_Stripped <- function(PeptideWithMods) {
  StrippedPep <- str_remove_all(PeptideWithMods, "\\[(.*?)\\]")
       return(StrippedPep)
}

AlphabetizeXLKey <- function(XLPep1, XLPep2) {
  Interaction <- if_else(XLPep1 == XLPep2, paste0(XLPep1, "_", XLPep2), if_else(XLPep1 < XLPep2, paste0(XLPep1, "_", XLPep2), paste0(XLPep2, "_", XLPep1)))
    return(Interaction)
}

AddKey <- function( input_table, Level) {
  nrows <- nrow(input_table)
    if(Level == "XL")
  {
    for (i in 1:nrows) {
      input_table$Pep1[i] <- ReturnXL_Stripped(input_table$Pep1[i])
      input_table$Pep2[i] <- ReturnXL_Stripped(input_table$Pep2[i])
      input_table$XLPep1[i] <- paste0(input_table$Pep1[i], "-", input_table$PepPos1[i], "-", input_table$Accession1[i])
      input_table$XLPep2[i] <- paste0(input_table$Pep2[i], "-", input_table$PepPos2[i], "-", input_table$Accession2[i])
      input_table$XL_Key[i] <- AlphabetizeXLKey(input_table$XLPep1[i], input_table$XLPep2[i])
    }
    return(input_table)
  }
 
  else if (Level == "URP") {
    for (i in 1:nrows) {
      input_table$Pep1[i] <- ReturnXL_Stripped(input_table$Pep1[i])
      input_table$Pep2[i] <- ReturnXL_Stripped(input_table$Pep2[i])
      input_table$ResidueKey1[i] <- paste0(input_table$Accession1[i], "-", input_table$ProtPos1[i])
      input_table$ResidueKey2[i] <- paste0(input_table$Accession2[i], "-", input_table$ProtPos2[i])
      input_table$URP_Key[i] <- AlphabetizeXLKey(input_table$ResidueKey1[i], input_table$ResidueKey2[i])
    }
    return(input_table)
  }
 
  else if (Level == "PPI") {
    for (i in 1:nrows) {
      input_table$PPI_Key[i] <- AlphabetizeXLKey(input_table$Accession1[i], input_table$Accession2[i])
    }
    return(input_table)
  }
}

#Useful functions to return the 0-based position of the crosslink mod (325.13 = light PIR stump mass, 333.14 = heavy labelled stump mass)
#or strip the peptide sequence, as exported by XLinkProphet under the heading peptide1/peptide2, of any mods. Useful for XLinkDB upload.
ReturnXL_PepPos <- function(PeptideWithMods) {
    V1 <- str_replace(PeptideWithMods, "\\[325\\.13\\]", "x")
    V1 <- str_replace(V1, "\\[333\\.14\\]", "x")
    V1 <- str_remove_all(V1, "\\[(.*?)\\]")
    V1 <- str_locate(V1, "x")[1]

    #make 0-based modPos
    V1 <- V1 -2
    return(V1);
}

CleanXLinkProphet <- function(XlinkProphet_output) {
  nrows <- nrow(XlinkProphet_output)

  for (i in 1:nrows) {
  XlinkProphet_output$Accession1[i] <- str_remove(str_sub(XlinkProphet_output$protein1[i], 1, 9), "sp\\|" )
  XlinkProphet_output$Accession2[i] <-  str_remove(str_sub(XlinkProphet_output$protein2[i], 1, 9), "sp\\|" )
  XlinkProphet_output$PepPos1[i] <- ReturnXL_PepPos(XlinkProphet_output$peptide1[i])
  XlinkProphet_output$PepPos2[i] <- ReturnXL_PepPos(XlinkProphet_output$peptide2[i])
  XlinkProphet_output$Pep1[i] <- ReturnXL_Stripped(XlinkProphet_output$peptide1[i])
  XlinkProphet_output$Pep2[i] <- ReturnXL_Stripped(XlinkProphet_output$peptide2[i])
    }

  return(XlinkProphet_output);
}

CleanXlinkProphetTable <- AddKey(CleanXLinkProphet(FilteredCSMs), "XL")
FilteredRedundantCrosslinks <- AddKey(CleanXLinkProphet(FilteredRedundantCrosslinks), "XL")



Clean6colTable <- AddKey(Raw6colOutput, "PPI")
Clean6colTable <- AddKey(Clean6colTable, "URP")
Clean6colTable <- AddKey(Clean6colTable, "XL")
Clean6colTable <- Clean6colTable %>%  select(XL_Key, URP_Key, PPI_Key, ResidueKey1, ResidueKey2)

AnnotatedXLTable <- distinct(full_join(CleanXlinkProphetTable, Clean6colTable, by = "XL_Key" ))
write.table(AnnotatedXLTable, file = paste0(SampleName, "_AnnotatedXLTable.txt"), sep = "\t", row.names = FALSE, quote = FALSE)

Annotated information on PPI novelty

APID <- read.delim("Data/APID/559292_noISI_Q3_June2021.txt", stringsAsFactors=FALSE)

APID <- APID %>%
  filter(ExpEvidences >= 1) %>%
  rename(Accession1 = UniprotID_A, Accession2 = UniprotID_B) %>%
  mutate(KnownAPID = TRUE)

APID <- AddKey(APID, "PPI")


PPIs <- AnnotatedXLTable %>%
  filter(intra != 1) %>%
  filter(!is.na(PPI_Key)) %>%
  group_by(PPI_Key) %>%
  summarise(NumURPs = n_distinct(URP_Key), NumCSMs = n()) %>%
  distinct()

PPI_URPs <- AnnotatedXLTable %>%
  filter(intra != 1) %>%
    filter(!is.na(PPI_Key)) %>%
  select(PPI_Key, URP_Key) %>%
  distinct()

PPIs <- left_join(PPIs, APID, by = "PPI_Key")

PPIs <- PPIs %>%
  mutate(KnownAPID = replace_na(KnownAPID, FALSE)) %>%
  separate(PPI_Key, into = c("AccessionA", "AccessionB"), sep = "_",remove= FALSE )

PPI_URPs <- left_join(PPI_URPs, PPIs, by = "PPI_Key")

write.table(PPIs, file = "IdentifiedPPIs.txt", sep = "\t", quote = FALSE, row.names = FALSE)
write.table(PPI_URPs, file = "IdentifiedPPI_URPs.txt", sep = "\t", quote = FALSE, row.names = FALSE)

Generating a table of URPs.

URPs <- AnnotatedXLTable %>%
  filter(composite_probability >= probability_comp_01) %>%
  mutate(CrosslinkType = if_else(intra == 1, "Intra", "Inter")) %>%
  select(URP_Key, CrosslinkType) %>%
  separate(URP_Key, sep = "_", into = c("Residue1", "Residue2"), remove = FALSE) %>%
  separate(Residue1, sep = "-", into = c("Accession1", "Lys1"), remove = FALSE) %>%
  separate(Residue2, sep = "-", into = c("Accession2", "Lys2"), remove = FALSE) %>%
  distinct()  


write.table(URPs, file = "URPs.txt", row.names = FALSE, quote = FALSE, sep = "\t")

Make an input table for XLinkDB

XLinkDBTable <- AnnotatedXLTable %>%
  select(Pep1, Accession1, PepPos1, Pep2, Accession2, PepPos2) %>%
  distinct()

write.table(XLinkDBTable, file = paste0("XLinkDBInput_", SampleName, ".txt"), sep = "\t", col.names = FALSE, row.names = FALSE, quote = FALSE)  

Generating a table of CSMs compatible for analysis with MethylQuant Converting the calculated crosslink monoisotopic parent mass (theoretically determined by Comet when using C13 isotope error in search) to a m/z and calculate the theoretical isotope labelling pair. This is useful for use with MethylQuant, where you provide a list of m/z, charges, scans and mass differences (actually, a m/z difference calculated by taking into account the number of arginines and lysines present in the sequence, and the parent charge). Searching for m/z’s of real spectral identifications, and also for masses in other replicates (matches between runs idea).

TableForMethylQuant <- AnnotatedXLTable %>%
  mutate(isLIGHT_ID = str_detect(peptide1, "325.13")) %>%
  mutate(Sequence = str_replace_all(composite_id, "_", "")) %>%
  mutate(Sequence = str_replace_all(Sequence, "x", "")) %>%
  mutate(NumLysines = str_count(Sequence, "K")) %>%
  mutate(NumArginines = str_count(Sequence, "R")) %>%
  mutate(mono_parent_mass = peptide1_mass + peptide2_mass + 693.399601) %>%
  mutate(HeavyMZshift = ((NumLysines * 8.014199) + (NumArginines * 10.008269))/parent_charge) %>%
  mutate(MZ = (mono_parent_mass + (parent_charge * 1.00728))/parent_charge) %>%
  mutate(MZShift = ifelse(isLIGHT_ID == FALSE, HeavyMZshift * -1, HeavyMZshift)) %>%
  mutate(Modifications = "") %>%
  rename(Charge = `parent_charge`) %>%
  rename("Calc m/z" = MZ) %>%
  rename("Mass Difference" = MZShift) %>%
  mutate(realID = TRUE)

nrows <- nrow(TableForMethylQuant)
for (i in 1:nrows) {
  TableForMethylQuant$SpecID[i] <- tail(strsplit(TableForMethylQuant$spectrum[i],"\\\\")[[1]],1)
  TableForMethylQuant$'Data File'[i] <- paste0(str_split_fixed(TableForMethylQuant$SpecID[i], "\\.", n =4)[,1], ".raw")
  TableForMethylQuant$'Start Scan'[i] <- str_split_fixed(TableForMethylQuant$SpecID[i], "\\.", n =4)[,2]
}

TableForMethylQuant <- TableForMethylQuant %>%
  filter(!is.na(`Data File`))

DuplicateRV_1 <- TableForMethylQuant %>%
  filter(!(str_detect(`Data File`, "RV_") & str_detect(`Data File`, "_1.raw"))) %>%
  mutate(`Data File` = str_replace(`Data File`, "FWD", "RV")) %>%
  mutate(`Data File` = str_replace(`Data File`, "_2.raw", "_1.raw")) %>%
  select(Accession1:`Start Scan`, peptide1_mass, peptide2_mass, parent_mass, Charge) %>%
  select(-SpecID) %>%
  mutate(realID = FALSE)

DuplicateRV_2 <- TableForMethylQuant %>%
  filter(!(str_detect(`Data File`, "RV_") & str_detect(`Data File`, "_2.raw"))) %>%
  mutate(`Data File` = str_replace(`Data File`, "FWD", "RV")) %>%
  mutate(`Data File` = str_replace(`Data File`, "_1.raw", "_2.raw")) %>%
  select(Accession1:`Start Scan`, peptide1_mass, peptide2_mass, parent_mass, Charge) %>%
  select(-SpecID) %>%
  mutate(realID = FALSE)


DuplicateFWD_1 <- TableForMethylQuant %>%
  filter(!(str_detect(`Data File`, "FWD_") & str_detect(`Data File`, "_1.raw"))) %>%
  mutate(`Data File` = str_replace(`Data File`, "RV", "FWD")) %>%
  mutate(`Data File` = str_replace(`Data File`, "_2.raw", "_1.raw")) %>%
  select(Accession1:`Start Scan`, peptide1_mass, peptide2_mass, parent_mass, Charge) %>%
  select(-SpecID) %>%
  mutate(realID = FALSE)


DuplicateFWD_2 <- TableForMethylQuant %>%
  filter(!(str_detect(`Data File`, "FWD_") & str_detect(`Data File`, "_2."))) %>%
  mutate(`Data File` = str_replace(`Data File`, "RV", "FWD")) %>%
  mutate(`Data File` = str_replace(`Data File`, "_1.raw", "_2.raw")) %>%
  select(Accession1:`Start Scan`, peptide1_mass, peptide2_mass, parent_mass, Charge) %>%
  select(-SpecID) %>%
  mutate(realID = FALSE)

TableForMethylQuant <- bind_rows(TableForMethylQuant, DuplicateRV_1, DuplicateRV_2, DuplicateFWD_1, DuplicateFWD_2)

TableForMethylQuant <- TableForMethylQuant %>%
    select(Accession1:`Start Scan`, peptide1_mass, peptide2_mass, parent_mass, Charge) %>%
  distinct() %>%
  filter(!is.na(`Data File`)) %>%
  filter(!is.na(Sequence))

TableForMethylQuant <- distinct(TableForMethylQuant)

write.csv(TableForMethylQuant, file = paste0("TableForMethylQuant_IDsDuplicated", SampleName, ".csv"), row.names = FALSE, quote = TRUE)  

Protein level normalisation factor extrapolation. Taking the normalisation factor for crosslink ratios by extrapolating normalisation factor from MaxQuant results of the matched proteome analysis.

proteinGroupsFWD <- read.delim("Data/MaxQuant/FW/proteinGroups.txt", stringsAsFactors=FALSE)
proteinGroupsRV <- read.delim("Data/MaxQuant/RV/proteinGroups.txt", stringsAsFactors=FALSE)

proteinGroupsFWD <- proteinGroupsFWD %>%
  mutate(Replicate = "FWD")  

proteinGroupsRV <- proteinGroupsRV %>%
  mutate(Replicate = "RV")  

ProteinswithRatiosFWD <- proteinGroupsFWD %>%
  filter(Ratio.H.L<100)

FWD.meanProteinH.L = mean(ProteinswithRatiosFWD$Ratio.H.L)
FWD.meanProteinH.L
## [1] 0.6823332
ProteinswithRatiosRV <- proteinGroupsRV %>%
  filter(Ratio.H.L<100)

RV.meanProteinH.L = mean(ProteinswithRatiosRV$Ratio.H.L)
RV.meanProteinH.L
## [1] 0.6504095

MethylQuant output cleaning, to check the quantified species match input by comparing PPM, and also filtering quanification above a score of 10.

MethylQuantOutput <- read_csv("Data/MethylQuant/TableForMethylQuant_IDsDuplicated04112020_Hpm1KO_Spheroplast_MethylQuant.csv")
## Rows: 33733 Columns: 67
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): Accession1, Accession2, Pep1, Pep2, XLPep1, XLPep2, XL_Key, URP_Ke...
## dbl (49): PepPos1, PepPos2, NumLysines, NumArginines, mono_parent_mass, Heav...
## lgl  (3): isLIGHT_ID, Modifications, realID
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
MethylQuantOutputLight <- MethylQuantOutput %>%
  filter(isLIGHT_ID == TRUE) %>%
  mutate(LightMZ =`Calc m/z`) %>%
  mutate(MethylQuantMassDiffLight = ((`Light m/z 1 IsotopeCorrelation`- LightMZ))) %>%  
  mutate(MethylQuantMassDiffLightPPM = abs(((`Light m/z 1 IsotopeCorrelation`- LightMZ)/LightMZ)*1000000) ) %>%
  mutate(HeavyMZ = `Calc m/z` + `Mass Difference`) %>%
  mutate(MethylQuantMassDiffHeavy = ((`Heavy m/z 1 IsotopeCorrelation`- HeavyMZ))) %>%  
  mutate(MethylQuantMassDiffHeavyPPM = abs(((`Heavy m/z 1 IsotopeCorrelation`-  HeavyMZ)/HeavyMZ)*1000000) )

MethylQuantOutputHeavy <- MethylQuantOutput %>%
  filter(isLIGHT_ID == FALSE) %>%
  mutate(LightMZ = `Calc m/z` + `Mass Difference`) %>%
  mutate(MethylQuantMassDiffLight = ((`Light m/z 1 IsotopeCorrelation`- LightMZ))) %>%  
  mutate(MethylQuantMassDiffLightPPM = abs(((`Light m/z 1 IsotopeCorrelation`- LightMZ)/LightMZ)*1000000) ) %>%
  mutate(HeavyMZ =  `Calc m/z`) %>%
  mutate(MethylQuantMassDiffHeavy = ((`Heavy m/z 1 IsotopeCorrelation`- HeavyMZ))) %>%  
  mutate(MethylQuantMassDiffHeavyPPM = abs(((`Heavy m/z 1 IsotopeCorrelation`-  HeavyMZ)/HeavyMZ)*1000000) )

MethylQuantOutput <- bind_rows(MethylQuantOutputLight, MethylQuantOutputHeavy)

MethylQuantOutput <- MethylQuantOutput %>%
  mutate(Iso2Light = LightMZ + (1.00866491588/Charge)) %>%
  mutate(Iso3Light = Iso2Light + (1.00866491588/Charge)) %>%
  mutate(Iso2LightPPM = ((Iso2Light -  `Light m/z 2 IsotopeCorrelation`)/Iso2Light)*100000) %>%
  mutate(Iso3LightPPM = ((Iso3Light -  `Light m/z 3 IsotopeCorrelation`)/Iso3Light)*100000) %>%
  mutate(Iso2Heavy = HeavyMZ + (1.00866491588/Charge)) %>%
  mutate(Iso3Heavy = Iso2Heavy + (1.00866491588/Charge)) %>%
  mutate(Iso2HeavyPPM = ((Iso2Heavy -  `Heavy m/z 2 IsotopeCorrelation`)/Iso2Heavy)*100000) %>%
  mutate(Iso3HeavyPPM = ((Iso3Heavy-  `Heavy m/z 3 IsotopeCorrelation`)/Iso3Heavy)*100000)


MethylQuantOutput <- MethylQuantOutput %>%
  mutate(Replicate = if_else(str_detect(`Data File`, "RV_"), "RV", "FWD")) %>%
  mutate(XL_ID = row_number())

PepsWithRatios <- MethylQuantOutput %>%
  filter(!is.na(`H/L Ratio #2`)) %>%
  filter(MethylQuantMassDiffHeavyPPM <= 10) %>%
  filter(MethylQuantMassDiffLightPPM <= 10) %>%
  filter(`MethylQuant Score` > 10)

Remove crosslinked peptides containing methionine oxidation, which may interfere with quantification

MethioninePeps <- AnnotatedXLTable %>%
  filter(str_detect(composite_id, "147.04"))

  for (i in 1:NROW(MethioninePeps)) {
     MethioninePeps$spectrum[i] = tail(strsplit(MethioninePeps$spectrum[i],"\\\\")[[1]],1)
}

PepsWithRatios <- anti_join(PepsWithRatios, MethioninePeps, by = c("SpecID" = "spectrum"))
FilteredRedundantCrosslinks <- FilteredRedundantCrosslinks %>%
  select(XL_Key) %>%
  mutate(ValidCrosslink = TRUE) %>%
  distinct()

PepsWithRatios <- inner_join(PepsWithRatios, FilteredRedundantCrosslinks, by = "XL_Key" )

PepsWithRatios <- distinct(PepsWithRatios)

Normalising SILAC ratios using protein level normalisation factor, which was was the mean protein H/L ratio from matched proteomics experiments. Averaging URP ratios to the biological level, and only considering URP that were quantified in both replicates. Annotating information about key covariates for use BL multiple testing correction.

URPsQuantified <- PepsWithRatios %>%
  select(XL_Key, URP_Key, PPI_Key,  `Data File`, Replicate, `H/L Ratio #2`, `MethylQuant Score`,`# Good Elution Profile Correlations`) %>%
  #picking the best ratio measurement for cross-linked peptides across technical replicates
  group_by(XL_Key, URP_Key, PPI_Key,  `Data File`, Replicate) %>%
  top_n(n = 1, wt = `MethylQuant Score`) %>%
  distinct() %>%
  ungroup() %>%
  #performing normalization to account for uneven mixing
  mutate(NormCrosslinkRatio = if_else(Replicate == "RV", `H/L Ratio #2`/RV.meanProteinH.L, `H/L Ratio #2`/FWD.meanProteinH.L)) %>%
  #log transforming ratios
  mutate(NormCrosslinkLog2Ratio = log2(NormCrosslinkRatio)) %>%
  #accounting for label swap
  mutate(NormCrosslinkLog2_KO_WT_Ratio = if_else(Replicate == "RV", NormCrosslinkLog2Ratio * -1, NormCrosslinkLog2Ratio )) %>%
  #summarising to biological replicate level
  group_by(URP_Key, PPI_Key, Replicate) %>%
  summarise(
  #collapsing redundant ratios for a URP to one ratio per biological replicate
            AvgCorrectXLRatio = mean(NormCrosslinkLog2_KO_WT_Ratio),
            NumBioReplicates = n_distinct(Replicate),
            #collecting data for covariates for BL analysis, across technical replicate measurements
            NumRatios = n(),
            All_Tech_Rep_Ratios = list(NormCrosslinkLog2_KO_WT_Ratio),
            MaxMethylQuantScore = max(`MethylQuant Score`),
            MaxNumberGoodElutionProfile = max(`# Good Elution Profile Correlations`)) %>%
  ungroup() %>%
  separate(URP_Key, c("ResidueKey1", "ResidueKey2"), sep = "_", remove = FALSE)  %>%
  separate(ResidueKey1, c("Accession1", "Residue1"), sep = "-", remove = FALSE) %>%
  separate(ResidueKey2, c("Accession2", "Residue2"), sep = "-", remove = FALSE) %>%
  filter(NumRatios > 0)
## `summarise()` has grouped output by 'URP_Key', 'PPI_Key'. You can override
## using the `.groups` argument.
URPsQuantifiedInBoth <- URPsQuantified %>% 
  #collapsing replicate ratios
  group_by(URP_Key, PPI_Key, Accession1, Accession2) %>%
  summarise(MeanLog2Ratio = mean(AvgCorrectXLRatio),
            sd = sd(AvgCorrectXLRatio),
            #for t-test
            Ratios = list(AvgCorrectXLRatio),
            NumBioReplicates = n_distinct(Replicate),
            #biological replicate mean ratios
            RatiosWritten = paste(round(AvgCorrectXLRatio, digits = 2), collapse=',' ),
            #all ratios collected
            AllQuantifiedRatios = list(flatten_dbl(All_Tech_Rep_Ratios)),
            #generating finalised covariates for BL correction
            sd_AllQuantifiedRatios = sd(flatten_dbl(All_Tech_Rep_Ratios)),
            n_AllQuantifiedRatios = sum(NumRatios),
            max_n_GoodIsoElutionProfiles_AllQuantifiedRatios = max(MaxNumberGoodElutionProfile)
    ) %>%
  ungroup() %>%
  #only taking URPs successfully quantified in both replicates
  filter(NumBioReplicates == 2)
## `summarise()` has grouped output by 'URP_Key', 'PPI_Key', 'Accession1'. You can
## override using the `.groups` argument.

Performing two-tailed, unpaired t-tests to compare crosslink ratios to a ratio value of 0 (no change).

URPsQuantifiedInBoth$Pval = NA

for (i in 1:NROW(URPsQuantifiedInBoth)){

  Ratios = unlist(URPsQuantifiedInBoth$Ratios[i])
  TTestResult = t.test(Ratios, mu = 0, alternative = "two.sided")
  URPsQuantifiedInBoth$Pval[i] = TTestResult$p.value[1]

}

Performing multiple testing correction using the BL method from the swFDR package, which uses covariate measures to calculated a q value for multiple testing correction

covariates<- URPsQuantifiedInBoth %>%
  ungroup() %>%
  select(sd_AllQuantifiedRatios, n_AllQuantifiedRatios, max_n_GoodIsoElutionProfiles_AllQuantifiedRatios)

library(swfdr)
lm <- lm_pi0(URPsQuantifiedInBoth$Pval, X=covariates, smooth.df = 2, type = "linear", smoothing = "smooth.spline")
qVal <- lm_qvalue(URPsQuantifiedInBoth$Pval, pfdr = TRUE, pi0 = lm,smoothing = "smooth.spline")


URPsQuantifiedInBoth$qvalue <- qVal$qvalues

URPsQuantifiedInBoth %>% 
  filter(qvalue <= 0.05) %>% 
  filter(abs(MeanLog2Ratio) >= 0.3)

Annotating information about proteins from UNIPROT

UNIPROT <- read.delim("Data/UniProt/UNIPROT_Yeast.txt", stringsAsFactors=FALSE)
UNIPROT <- UNIPROT %>%
  mutate(Gene.names.primary.1 = paste0(str_to_sentence(Gene.names...primary.., locale = "en"), "p")) %>%
  rename(Entry.name.1 = Entry.name) %>%
  select(Entry, Entry.name.1, Gene.names.primary.1)


URPsQuantifiedInBoth <- left_join(URPsQuantifiedInBoth, UNIPROT, by = c("Accession1" = "Entry"))

UNIPROT <- UNIPROT %>%
  rename(Entry.name.2 = Entry.name.1,
         Gene.names.primary.2 = Gene.names.primary.1)
URPsQuantifiedInBoth <- left_join(URPsQuantifiedInBoth, UNIPROT, by = c("Accession2" = "Entry"))

Generating volcano plot. Purposely excluding one extreme value for visibility sake

ForVolcano <- URPsQuantifiedInBoth %>%
    mutate(log2qvalue = -1* log2(qvalue)) %>%
    mutate(Direction = case_when(
    (qvalue <= 0.05) & (MeanLog2Ratio >= 0.3) ~ "Up",
    (qvalue <= 0.05) & (MeanLog2Ratio <= -0.3) ~ "Down",
    (qvalue > 0.05)  ~"Not")) %>%
    mutate(LabelName = paste0(Gene.names.primary.1, "-\n", Gene.names.primary.2)) %>%
    #mutate(LabelName =  Gene.names.primary.1) %>%
  mutate(Label = if_else(((qvalue< 0.05) & (abs(MeanLog2Ratio) >= 0.3)), TRUE, FALSE)) %>%
    mutate(Alpha = if_else(((qvalue< 0.05) & (abs(MeanLog2Ratio) >= 0.3)), TRUE, FALSE))



G <- ggplot(ForVolcano, aes(x = MeanLog2Ratio, y = log2qvalue,  color = Direction , alpha = Alpha, label = LabelName)) +
  geom_jitter() +
  #geom_text_repel(data = subset(ForVolcano, Label == TRUE), size = 3) +
  scale_color_manual(values = c( "#0000FF", "#bababa", "#D90000")) +
  theme_bw() +
  theme(legend.position = "none",
        text=element_text(size=10,  family="Arial"),
        panel.border = element_rect(fill=NA, colour = "black", size=1),
        panel.grid.minor = element_blank()) +
  xlim(-2.,2.3)#+
 #ylim(0, 2)
  #scale_y_continuous(trans=scales::pseudo_log_trans(base = 10))


ggplotly(G)
## Warning: Using alpha for a discrete variable is not advised.
ggsave(filename = "CrosslinkVolcano.pdf", device = cairo_pdf,
       width = 3, height = 3, units = "in")
## Warning: Using alpha for a discrete variable is not advised.
## Warning: Removed 37 rows containing missing values (geom_point).

Grouping URPs by signifcance level

SignificantlyChanging_URPs <- URPsQuantifiedInBoth %>%
    filter(qvalue <= 0.05) %>% 
  filter(abs(MeanLog2Ratio) >= 0.3)

SignificantlyChanging_URPs
NonChanging_URPS <- URPsQuantifiedInBoth %>%
  filter(qvalue > 0.05)
NonChanging_URPS

Annoting protein level change to changing URPs

proteinGroupsCombined <- bind_rows(proteinGroupsFWD, proteinGroupsRV)

filteredProteinGroups <- proteinGroupsCombined %>%
  separate_rows(Majority.protein.IDs, sep =";") %>%
  filter(!is.na(Ratio.H.L.normalized)) %>%
  filter(Ratio.H.L.variability.... < 30) %>%
  filter(!str_detect(Protein.IDs, "CON_")) %>%
  filter(!str_detect(Protein.IDs, "REV_")) %>%
  mutate(Accession1 =  str_remove(str_sub(Majority.protein.IDs, 1, 9), "sp\\|" )) %>%
  rename(NormProteinRatio = Ratio.H.L.normalized) %>%
  mutate(NormProteinLog2Ratio = log2(NormProteinRatio)) %>%
  mutate(NormProteinLog2_KO_WT_ratio = if_else(Replicate == "RV", NormProteinLog2Ratio * -1, NormProteinLog2Ratio)) %>%
  select(NormProteinLog2_KO_WT_ratio, Accession1, Fasta.headers, Ratio.H.L, Ratio.H.L.variability...., Ratio.H.L.count, Replicate)

#average over replicates
MeanRatioProteinLevel <- filteredProteinGroups %>%
  group_by(Accession1) %>%
  summarise(Mean_NormProteinLog2_KO_WT_ratio.Protein.1 = mean(NormProteinLog2_KO_WT_ratio))

SignificantlyChanging_URPs <- left_join(SignificantlyChanging_URPs, MeanRatioProteinLevel, by = c("Accession1" = "Accession1"))

MeanRatioProteinLevel <- MeanRatioProteinLevel %>%
  rename(Mean_NormProteinLog2_KO_WT_ratio.Protein.2 = Mean_NormProteinLog2_KO_WT_ratio.Protein.1)
SignificantlyChanging_URPs <- left_join(SignificantlyChanging_URPs, MeanRatioProteinLevel, by = c("Accession2" = "Accession1"))

Comparing URP ratios to protein level changes, two-tailed, unpaired t-test, with “fdr” method of multiple testing comparison

SignificantlyChanging_URPs_withProteinQuant <- SignificantlyChanging_URPs %>%
  filter(!is.na(Mean_NormProteinLog2_KO_WT_ratio.Protein.1)) %>%
  filter(!is.na(Mean_NormProteinLog2_KO_WT_ratio.Protein.2))


for (i in 1:NROW(SignificantlyChanging_URPs_withProteinQuant)){

  Ratios = unlist(SignificantlyChanging_URPs_withProteinQuant$Ratios[i])
 
  Test.1 = t.test(Ratios, mu = SignificantlyChanging_URPs_withProteinQuant$Mean_NormProteinLog2_KO_WT_ratio.Protein.1[i], alternative = "two.sided")
  SignificantlyChanging_URPs_withProteinQuant$Pval.againstProtein.1[i] = Test.1$p.value[1]
 
  Test.2 = t.test(Ratios, mu = SignificantlyChanging_URPs_withProteinQuant$Mean_NormProteinLog2_KO_WT_ratio.Protein.2[i], alternative = "two.sided")
  SignificantlyChanging_URPs_withProteinQuant$Pval.againstProtein.2[i] = Test.2$p.value[1]
}
## Warning: Unknown or uninitialised column: `Pval.againstProtein.1`.
## Warning: Unknown or uninitialised column: `Pval.againstProtein.2`.
covariates<- SignificantlyChanging_URPs_withProteinQuant %>%
  ungroup() %>%
  select(sd_AllQuantifiedRatios, n_AllQuantifiedRatios, max_n_GoodIsoElutionProfiles_AllQuantifiedRatios)

library(swfdr)

lm <- lm_pi0(SignificantlyChanging_URPs_withProteinQuant$Pval.againstProtein.1, X=covariates, smooth.df = 2, smoothing = "smooth.spline")
## Warning: maximal p is smaller than maximal lambda
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
qVal <- lm_qvalue(SignificantlyChanging_URPs_withProteinQuant$Pval.againstProtein.1, pfdr = TRUE, pi0 = lm)
SignificantlyChanging_URPs_withProteinQuant$qvalue_Protein1 <-  qVal$qvalues

lm <- lm_pi0(SignificantlyChanging_URPs_withProteinQuant$Pval.againstProtein.2, X=covariates, smooth.df = 2, smoothing = "smooth.spline")
qVal <- lm_qvalue(SignificantlyChanging_URPs_withProteinQuant$Pval.againstProtein.2, pfdr = TRUE, pi0 = lm)
SignificantlyChanging_URPs_withProteinQuant$qvalue_Protein2 <-  qVal$qvalues

Generating a list of significantly changing URPs (at crosslink, and compared to protein level) for figure making in Cytoscape

ForFig <- SignificantlyChanging_URPs_withProteinQuant %>%
  filter(qvalue_Protein1 <= 0.05) %>%
  filter(qvalue_Protein2 <= 0.05) %>%
  select(-Ratios, - AllQuantifiedRatios) %>%
  separate(URP_Key, sep = "_", into = c("node1", "node2"), remove = FALSE)

MeanRatioProteinLevel <- MeanRatioProteinLevel %>%
  mutate(Proteinlog2 = Mean_NormProteinLog2_KO_WT_ratio.Protein.2)

ForNodes <- as_data_frame(c(ForFig$node1, ForFig$node2))
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
ForNodes <- ForNodes %>% 
  separate(value, into = c("Protein", "Residue"), remove = FALSE, sep = "-") %>% 
  rename(Node = value)
 

write.table(MeanRatioProteinLevel, file = "ProteinsForCytoscape.txt", sep = "\t", row.names = FALSE, quote = FALSE)
write.table(ForNodes, file = "NodeForCytoscape.txt", sep = "\t", row.names = FALSE, quote = FALSE)
write.table(ForFig, file = "CrosslinksForCytoscape.txt", sep = "\t", row.names = FALSE, quote = FALSE)

Generating supp table info

ForSupp <- URPsQuantifiedInBoth %>% 
  select(URP_Key, Gene.names.primary.1, Gene.names.primary.2, MeanLog2Ratio, sd, RatiosWritten, Pval, qvalue, ) %>% 
  separate(URP_Key, into = c("A", "B"), sep = "_") %>% 
  separate(A, into = c("Protein.1", "Residue.1"), sep = "-") %>% 
  separate(B, into = c("Protein.2", "Residue.2"), sep = "-") %>% 
  rename(mean.URP.log2ratio =MeanLog2Ratio,
         sd.URP.log2ratios = sd,
         biologicalreplicates.log2ratios = RatiosWritten,
         pvalue = Pval) %>% 
  mutate(mean.URP.log2ratio = round(mean.URP.log2ratio, 2),
         sd.URP.log2ratios = round(sd.URP.log2ratios, 2),
         pvalue =  round(pvalue,2),
         qvalue = round(qvalue, 2))

MeanRatioProteinLevel <- MeanRatioProteinLevel %>% 
  select(Accession1, Proteinlog2) %>% 
  mutate(protein1.log2ratio = round(Proteinlog2, 2)) %>% 
  select(-Proteinlog2)

ForSupp <- left_join(ForSupp, MeanRatioProteinLevel, by =c("Protein.1" ="Accession1" ))

MeanRatioProteinLevel <- MeanRatioProteinLevel %>% 
  rename(protein2.log2ratio = protein1.log2ratio)
ForSupp <- left_join(ForSupp, MeanRatioProteinLevel, by =c("Protein.2" ="Accession1" ))

write.table(ForSupp, file = "Tab1_TableS4.txt", row.names = FALSE, quote = FALSE, sep = "\t")

URPsSigForSupp <- SignificantlyChanging_URPs_withProteinQuant %>% 
  filter(qvalue_Protein1 <= 0.05) %>% 
  filter(qvalue_Protein2 <= 0.05) %>% 
  select(URP_Key, Gene.names.primary.1, Gene.names.primary.2, MeanLog2Ratio, sd,RatiosWritten, Pval, qvalue,  Mean_NormProteinLog2_KO_WT_ratio.Protein.1, Mean_NormProteinLog2_KO_WT_ratio.Protein.2,       Pval.againstProtein.1, Pval.againstProtein.2, qvalue_Protein1, qvalue_Protein2) %>% 
  separate(URP_Key, into = c("A", "B"), sep = "_") %>% 
  separate(A, into = c("Protein.1", "Residue.1"), sep = "-") %>% 
  separate(B, into = c("Protein.2", "Residue.2"), sep = "-") %>% 
  rename(mean.URP.log2ratio =MeanLog2Ratio,
         sd.URP.log2ratios = sd,
         biologicalreplicates.log2ratios = RatiosWritten,
         pvalue = Pval,
         protein1.log2ratio = Mean_NormProteinLog2_KO_WT_ratio.Protein.1,
         protein2.log2ratio = Mean_NormProteinLog2_KO_WT_ratio.Protein.2,
         protein1.URPtoProt.pvalue = Pval.againstProtein.1,
         protein2.URPtoProt.pvalue = Pval.againstProtein.2,
         protein1.URPtoProt.qvalue = qvalue_Protein1,
         protein2.URPtoProt.qvalue =qvalue_Protein2
         ) %>% 
  mutate(mean.URP.log2ratio = round(mean.URP.log2ratio, 2),
         sd.URP.log2ratios = round(sd.URP.log2ratios, 2),
         pvalue =  round(pvalue,2),
         qvalue = round(qvalue, 2),
         protein1.log2ratio = round(protein1.log2ratio,2),
         protein2.log2ratio = round(protein2.log2ratio, 2),
         protein1.URPtoProt.pvalue = round(protein1.URPtoProt.pvalue,2),
         protein2.URPtoProt.pvalue = round(protein2.URPtoProt.pvalue,2),
         protein1.URPtoProt.qvalue= round(protein1.URPtoProt.qvalue,2),
         protein2.URPtoProt.qvalue = round(protein2.URPtoProt.qvalue,2)
         )


URPsSigForSupp
write.table(URPsSigForSupp, file = "Tab2_TableS4.txt", row.names = FALSE, quote = FALSE, sep = "\t")

Investigating URPs with signal in only one strain, but also replicatable across label swap replicates

PepsWithoutRatios <- anti_join(MethylQuantOutput, PepsWithRatios, by = "URP_Key")

PepsWithOnlyOneChanelOnly <- PepsWithoutRatios %>%
  separate(ResidueKey1, c("Accession1", "Residue1"), sep = "-", remove = FALSE) %>%
  separate(ResidueKey2, c("Accession2", "Residue2"), sep = "-", remove = FALSE) %>%
  distinct() %>%
  mutate(TotalLightIntensity = `Light Intensity 1 IsotopeCorrelation` + `Light Intensity 2 IsotopeCorrelation` + `Light Intensity 3 IsotopeCorrelation`) %>%
  mutate(TotalHeavyIntensity = `Heavy Intensity 1 IsotopeCorrelation` + `Heavy Intensity 2 IsotopeCorrelation` + `Heavy Intensity 3 IsotopeCorrelation` ) %>%
  mutate(OnlyHeavy = if_else(TotalLightIntensity == 0 & TotalHeavyIntensity !=0, TRUE, FALSE)) %>%
  mutate(OnlyLight = if_else(TotalHeavyIntensity == 0 & TotalLightIntensity !=0, TRUE, FALSE)) %>%
  group_by(URP_Key, Replicate, Accession1, Residue1, Accession2, Residue2) %>%
  distinct() %>%
  summarise(AllOnlyLight = all(OnlyLight),
            AllOnlyHeavy = all(OnlyHeavy)) %>%
  ungroup() %>%
  mutate(UniqueToOneChannel = AllOnlyLight + AllOnlyHeavy) %>%
  filter(UniqueToOneChannel == 1) %>%
  group_by(URP_Key, Accession1, Residue1, Accession2, Residue2) %>%  
  summarise(NumberReps = n(), SumAllLight = sum(AllOnlyLight), SumAllHeavy = sum(AllOnlyHeavy)) %>%
  ungroup()
## `summarise()` has grouped output by 'URP_Key', 'Replicate', 'Accession1',
## 'Residue1', 'Accession2'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'URP_Key', 'Accession1', 'Residue1',
## 'Accession2'. You can override using the `.groups` argument.
PepsWithOneChanelOnly_OppositeInLabelSwap <- PepsWithOnlyOneChanelOnly %>%
  filter(NumberReps == 2, SumAllLight == 1, SumAllHeavy == 1)

NROW(PepsWithOneChanelOnly_OppositeInLabelSwap)
## [1] 0

To determine degree of label mixing in crosslinked spectra

Peps <- read.delim("Data/XLinkProphet/iprophet.pep.xml.tsv")

#1% FDR at peptide level controlled by iProphet in TPP
Filtered <- Peps %>% 
  filter(probability >= 0.6909)

Light <- Filtered %>% 
  filter(str_detect(modified_peptide, "325"))

Heavy <- Filtered %>% 
  filter(str_detect(modified_peptide, "333"))

Light_scans = unique(Light$spectrum)
Heavy_scans = unique(Heavy$spectrum)

#total number of scans with IDs
length(unique(Filtered$spectrum))
## [1] 22973
#total numbers of scans that had both heavy and light IDd assigned to them
intersect(Light_scans, Heavy_scans)
##  [1] "04112020_Hpm1KO_FWD_F10_XL_5uL_1_000_A.19365.19365.1"   
##  [2] "04112020_Hpm1KO_FWD_F10_XL_5uL_1_000_A.21894.21894.1"   
##  [3] "04112020_Hpm1KO_FWD_F10_XL_5uL_1_000_A.27696.27696.1"   
##  [4] "04112020_Hpm1KO_FWD_F10_XL_5uL_2_000_B.19311.19311.1"   
##  [5] "04112020_Hpm1KO_FWD_F10_XL_5uL_2_000_B.20812.20812.1"   
##  [6] "04112020_Hpm1KO_FWD_F10_XL_5uL_2_000_B.20822.20822.1"   
##  [7] "04112020_Hpm1KO_FWD_F10_XL_5uL_2_000_B.20838.20838.1"   
##  [8] "04112020_Hpm1KO_FWD_F10_XL_5uL_2_000_B.22835.22835.1"   
##  [9] "04112020_Hpm1KO_FWD_F11_XL_5uL_1_000_B.18618.18618.1"   
## [10] "04112020_Hpm1KO_FWD_F11_XL_5uL_1_000_B.18624.18624.1"   
## [11] "04112020_Hpm1KO_FWD_F11_XL_5uL_1_000_B.18659.18659.1"   
## [12] "04112020_Hpm1KO_FWD_F11_XL_5uL_1_000_B.18808.18808.1"   
## [13] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_000_B.18574.18574.1"   
## [14] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_000_B.18613.18613.1"   
## [15] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_000_B.18720.18720.1"   
## [16] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_000_B.18785.18785.1"   
## [17] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_000_B.19778.19778.1"   
## [18] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_006_B.19778.19778.1"   
## [19] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_000_B.22909.22909.1"   
## [20] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_000_B.24052.24052.1"   
## [21] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_000_B.25681.25681.1"   
## [22] "04112020_Hpm1KO_FWD_F12_XL_5uL_1_000_B.19435.19435.1"   
## [23] "04112020_Hpm1KO_FWD_F12_XL_5uL_1_000_B.20809.20809.1"   
## [24] "04112020_Hpm1KO_FWD_F12_XL_5uL_2_000_B.19541.19541.1"   
## [25] "04112020_Hpm1KO_FWD_F13_15_XL_5uL_1_000_B.18892.18892.1"
## [26] "04112020_Hpm1KO_FWD_F13_15_XL_5uL_1_000_A.21339.21339.1"
## [27] "04112020_Hpm1KO_FWD_F13_15_XL_5uL_1_000_B.23808.23808.1"
## [28] "04112020_Hpm1KO_FWD_F13_15_XL_5uL_1_000_A.28845.28845.1"
## [29] "04112020_Hpm1KO_FWD_F13_15_XL_5uL_2_000_B.15021.15021.2"
## [30] "04112020_Hpm1KO_FWD_F13_15_XL_5uL_2_003_A.23098.23098.1"
## [31] "04112020_Hpm1KO_FWD_F13_15_XL_5uL_2_000_B.23750.23750.1"
## [32] "04112020_Hpm1KO_FWD_F13_15_XL_5uL_2_003_A.28893.28893.1"
## [33] "04112020_Hpm1KO_FWD_F8_9_XL_5uL_2_001_A.25380.25380.1"  
## [34] "04112020_Hpm1KO_RV_F10_XL_5uL_1_000_B.18595.18595.1"    
## [35] "04112020_Hpm1KO_RV_F10_XL_5uL_1_002_B.20810.20810.1"    
## [36] "04112020_Hpm1KO_RV_F10_XL_5uL_2_000_B.18393.18393.1"    
## [37] "04112020_Hpm1KO_RV_F10_XL_5uL_2_000_B.18399.18399.1"    
## [38] "04112020_Hpm1KO_RV_F10_XL_5uL_2_000_B.18596.18596.1"    
## [39] "04112020_Hpm1KO_RV_F10_XL_5uL_2_000_A.21083.21083.1"    
## [40] "04112020_Hpm1KO_RV_F11_XL_5uL_1_000_B.18193.18193.1"    
## [41] "04112020_Hpm1KO_RV_F11_XL_5uL_1_000_B.19327.19327.1"    
## [42] "04112020_Hpm1KO_RV_F11_XL_5uL_2_000_B.18188.18188.1"    
## [43] "04112020_Hpm1KO_RV_F11_XL_5uL_2_000_B.18295.18295.1"    
## [44] "04112020_Hpm1KO_RV_F11_XL_5uL_2_000_B.18320.18320.1"    
## [45] "04112020_Hpm1KO_RV_F11_XL_5uL_2_001_B.19348.19348.1"    
## [46] "04112020_Hpm1KO_RV_F11_XL_5uL_2_000_B.19348.19348.1"    
## [47] "04112020_Hpm1KO_RV_F11_XL_5uL_2_000_B.19351.19351.1"    
## [48] "04112020_Hpm1KO_RV_F11_XL_5uL_2_000_B.19356.19356.1"    
## [49] "04112020_Hpm1KO_RV_F11_XL_5uL_2_001_B.20127.20127.1"    
## [50] "04112020_Hpm1KO_RV_F12_XL_5uL_2_001_A.38853.38853.1"    
## [51] "04112020_Hpm1KO_RV_F13_15_XL_5uL_2_000_A.11206.11206.1"

For Supp comparison on replicates

FWD_URPs_quantified <- URPsQuantified %>% 
  filter(Replicate == "FWD") %>% 
  select(URP_Key, Replicate, AvgCorrectXLRatio)

RV_URPs_quantified <- URPsQuantified %>% 
  filter(Replicate == "RV") %>% 
  select(URP_Key, Replicate, AvgCorrectXLRatio) 

URPs_Both_SuppFig <- left_join(FWD_URPs_quantified, RV_URPs_quantified, by = "URP_Key")


SigURPs <- ForFig %>% 
  select(URP_Key, MeanLog2Ratio) %>% 
  mutate(Sig = TRUE)

URPs_Both_SuppFig <- left_join(URPs_Both_SuppFig, SigURPs, by = "URP_Key")


URPs_Both_SuppFig <- URPs_Both_SuppFig %>% 
  filter(!is.na(Replicate.x)) %>% 
  filter(!is.na(Replicate.y)) %>% 
    mutate(Direction = case_when(
    (MeanLog2Ratio >= 0.3) & (Sig == TRUE) ~ "Up",
    (MeanLog2Ratio <= -0.3) & (Sig == TRUE) ~ "Down")) %>%
    mutate(Direction = replace_na(Direction, "Not"),
           Sig = replace_na(Sig, FALSE)) 

ggplot(URPs_Both_SuppFig,aes(x = AvgCorrectXLRatio.x, y = AvgCorrectXLRatio.y, color = Direction)) +
  geom_jitter() +
    scale_color_manual(values = c( "#0000FF", "#bababa", "#D90000")) +
    theme_bw() +
  theme(legend.position = "none",
        text=element_text(size=10,  family="Arial"),
        panel.border = element_rect(fill=NA, colour = "black", size=1),
        panel.grid.minor = element_blank()) +
    xlim(-4, 4) +
  ylim(-4,4) +
  geom_abline(intercept =0 , slope = 1)
## Warning: Removed 2 rows containing missing values (geom_point).

ggsave(filename = "S4_URP.pdf", device = cairo_pdf,
       width = 3.5, height = 3.5, units = "in")
## Warning: Removed 2 rows containing missing values (geom_point).
FWD_prot_quantified <- filteredProteinGroups %>% 
  filter(Replicate == "FWD") %>% 
  select( Accession1, NormProteinLog2_KO_WT_ratio) 

RV_prot_quantified <- filteredProteinGroups %>% 
  filter(Replicate == "RV") %>% 
  select( Accession1, NormProteinLog2_KO_WT_ratio) 

  
Prot_Both_quantified <- left_join(FWD_prot_quantified, RV_prot_quantified, by = "Accession1")

Prot_Both_quantified <- Prot_Both_quantified %>% 
  filter(!is.na(NormProteinLog2_KO_WT_ratio.x)) %>% 
  filter(!is.na(NormProteinLog2_KO_WT_ratio.y)) 

Prot <- ggplot(Prot_Both_quantified,aes(x = NormProteinLog2_KO_WT_ratio.x, y = NormProteinLog2_KO_WT_ratio.y)) +
  geom_jitter(color ="#bababa" ) +
    theme_bw() +
  theme(legend.position = "none",
        text=element_text(size=10,  family="Arial"),
        panel.border = element_rect(fill=NA, colour = "black", size=1),
        panel.grid.minor = element_blank()) +
    xlim(-4, 4) +
  ylim(-4, 4) +
  geom_abline(intercept =0 , slope = 1)
Prot

ggsave(filename = "S5_Prot.pdf", device = cairo_pdf,
       width = 3.5, height = 3.5, units = "in")